scripts/Empirical example NatMath/NatMathPlots.R

library(ggpubr)
library(ggplot2)
library(mirt)
library(TestGardener)

# Ucol is column in U corresponding to a specific item
getAvgBinProb <- function(Ucol, orderedIndexes, binSize, binNo) {
    avgs <- matrix(0, nrow = binNo, ncol = length(unique(Ucol)))

    for (score in sort(unique(Ucol))) {
        for (bin in 1:binNo) {
            startInd <- (bin-1)*binSize+1
            noScore <- sum(Ucol[orderedIndexes[startInd:(startInd+binSize-1)]]==score)
            avgs[bin, score] <- noScore/binSize
        }
    }
    avgs
}

divisors <- function(n) {
    res <- c()
    # iterate from 3 to sqrt(n)
    for (i in 2:n) {
        if (n %% i == 0) {
            res <- c(res, i)
        }
    }
    return(res)
}

load("data/NatMath_fittedmodel.RData")
load("data/NatMath_fittedmodel_mirt.RData")

cycle=10

optthetas <- AnalyzeResult$parList[[cycle]]$theta
parthetas <- fscores(natMathGpcm,
                     method = "EAP",
                     response.pattern = NULL)[, "F1"]
# Wbinsmth.plot(AnalyzeResult$parList[[cycle]]$binctr, AnalyzeResult$parList[[cycle]]$Qvec,
#               AnalyzeResult$parList[[cycle]]$WfdList,
#               NatMath_dataList, Wrng=c(0,7), saveplot = F)

# Create data frames with theta estimates from both models, order by thetas
df <- data.frame(person = 1:nrow(U), sum = rowSums(U),
                 optimal = optthetas, parametric = parthetas)
plot(optthetas, parthetas) # explore ordering
cor(df$sum, df$optimal, method = "spearman")
cor(df$sum, df$parametric, method = "spearman")
cor(df$optimal, df$parametric, method = "spearman")
df_opt <- df[, c(1, 3, 4)]
df_par <- df[, c(1, 4, 4)]
df_opt <- df_opt[order(df_opt$optimal), ]
df_par <- df_par[order(df_par$parametric), ]

divisors(nrow(U)) # 2235/149=15 gives equal number in each bin
bins <- 15
peopleInBins <- 149
binCenters <- seq(149/2235/2, 1, 149/2235)
binCenters <- binCenters*100

# convert U to index
U = U + 1
itemfit(natMathGpcm) # 8, 23 are bad fits, 28 is polytomous with bad fit

percentiles <- seq(0, 100)/100
percentilesopt <- quantile(optthetas, percentiles, type = 7)
percentilespar <- quantile(parthetas, percentiles, type = 7)
percentileval <- seq(0, 100)/15
cycle <- 10

# test for item 28 and optimal computed bin
# get avg prob of each response category within each theta bin
# avgBinProbOpt <- getAvgBinProb(U[, 28], df_opt[, 1], peopleInBins, bins)
# avgBinProbPar <- getAvgBinProb(U[, 28], df_par[, 1], peopleInBins, bins)
# comparePercICCs(item = 28, mirtModel = natMathGpcm, WfdList = AnalyzeResult$parList[[cycle]]$WfdList,
#                         mirtThetas = percentilespar,
#                         bincenters = binCenters, binvalues = avgBinProbOpt,
#                         optThetas = percentilesopt, legend = T, grayscale = F)
# comparePercICCs(item = 28, mirtModel = natMathGpcm, WfdList = AnalyzeResult$parList[[cycle]]$WfdList,
#                 mirtThetas = percentilespar,
#                 bincenters = binCenters, binvalues = avgBinProbPar,
#                 optThetas = percentilesopt, legend = T, grayscale = F)
# comparePercICCsOptimal(item = 28, WfdList = AnalyzeResult$parList[[cycle]]$WfdList,
#                 bincenters = binCenters, binvalues = avgBinProbPar,
#                 optThetas = percentilesopt, legend = T, grayscale = F)
# comparePercICCsMirt(item = 28, mirtModel = natMathGpcm,
#                 mirtThetas = percentilespar, bincenters = binCenters,
#                 binvalues = avgBinProbPar, legend = T, grayscale = F)


pdf("plots/NatMath combined perc ICC plots.pdf")
for (i in 1:32) {
    # get avg prob of each response category within each theta bin
    avgBinProbOpt <- getAvgBinProb(U[, i], df_opt[, 1], peopleInBins, bins)
    avgBinProbPar <- getAvgBinProb(U[, i], df_par[, 1], peopleInBins, bins)
    # optimal computed bin averages
    icc1 <- comparePercICCs(item = i, mirtModel = natMathGpcm, WfdList = AnalyzeResult$parList[[cycle]]$WfdList,
                            mirtThetas = percentilespar,
                            bincenters = binCenters, binvalues = avgBinProbOpt,
                            optThetas = percentilesopt, legend = T, grayscale = F)
    # mirt computed bin averages
    icc2 <- comparePercICCs(item = i, mirtModel = natMathGpcm, WfdList = AnalyzeResult$parList[[cycle]]$WfdList,
                            mirtThetas = percentilespar,
                            bincenters = binCenters, binvalues = avgBinProbPar,
                            optThetas = percentilesopt, legend = T, grayscale = F)
    print(ggarrange(icc1, icc2, ncol = 2, nrow = 1, common.legend = T))
}
dev.off()

pdf("plots/NatMath separate perc ICC plots.pdf")
for (i in 1:32) {
    # get avg prob of each response category within each theta bin
    avgBinProbOpt <- getAvgBinProb(U[, i], df_opt[, 1], peopleInBins, bins)
    avgBinProbPar <- getAvgBinProb(U[, i], df_par[, 1], peopleInBins, bins)
    # optimal
    icc1 <- comparePercICCsOptimal(item = i, WfdList = AnalyzeResult$parList[[cycle]]$WfdList,
                           bincenters = binCenters, binvalues = avgBinProbOpt,
                           optThetas = percentilesopt, legend = T, grayscale = F)
    # mirt
    icc2 <- comparePercICCsMirt(item = i, mirtModel = natMathGpcm,
                        mirtThetas = percentilespar, bincenters = binCenters,
                        binvalues = avgBinProbPar, legend = T, grayscale = F)
    print(ggarrange(icc1, icc2, ncol = 2, nrow = 1, common.legend = T))
}
dev.off()
joakimwallmark/PolyOptimalIRT documentation built on Dec. 21, 2021, 1:16 a.m.